import pandas as pd
import urllib
import numpy as np
import urllib.request
import re
from textblob import TextBlob
%run lib.py
#name="Legally%20Blonde"
#name="aboutmary"
#name="10Things"
name="magnolia"
#name="Friday%20The%2013th"
#name="Ghost%20Ship"
#name="Juno"
#name="Reservoir+Dogs"
#name="shawshank"
#name="Sixth%20Sense,%20The"
#name="sunset_bld_3_21_49"
#name="Titanic"
#name="toy_story"
#name="trainspotting"
#name="transformers"
#name="the-truman-show_shooting"
#name="batman_production"
ext="html"
txtfiles=["Ghost%20Ship", "Legally%20Blonde", "Friday%20The%2013th", "Juno", "Reservoir+Dogs", "Sixth%20Sense,%20The", "Titanic"]
if name in txtfiles:
ext="txt"
fp = urllib.request.urlopen("http://www.dailyscript.com/scripts/"+name+"."+ext)
mybytes = fp.read()
mystr = mybytes.decode("utf8", "ignore")
fp.close()
liston=mystr.split("\n")
liston=[s.replace('\r', '') for s in liston]
liston=[re.sub('<[^<]+?>', '', text) for text in liston]
if name=="shawshank":
liston=[i.replace("\t", " ") for i in liston]
char=""
script=[]
charintro=' '
endofdialogue=' '
dialoguepre=' '
newscenepre=' '
charintro=''
endofdialogue=''
dialoguepre=''
newscenepre=''
i=45
print("Characters")
i, charintro=nextbigchunk(liston, i)
print("Adverbs")
i, adverb=nextbigchunk(liston, i, adverbs=True)
print("Dialogues")
i, dialoguepre=nextbigchunk(liston, i)
print("New Scene:")
i, newscenepre=nextbigchunk(liston, i)
if newscenepre=="X":
i=100
i, newscenepre=nextbigchunk(liston, i)
if name=="aboutmary":
newscenepre=" ".join(["" for i in range(56)])
if len(newscenepre)==len(charintro):
newscenepre="X"
endofdialogue=newscenepre
scene=1
for s in liston:
if s[0:len(charintro)]==charintro and s[len(charintro)]!=" " and s.strip()[0]!="(" and s.strip()[len(s.strip())-1]!=")":
#print("Charatcer*****")
char=s[len(charintro):]
new=dict()
new['char']=char.strip()
new['dialogue']=""
new['scene']=scene
new['adverb']=""
if s==endofdialogue or s.replace(" ", "")=="":
if char!="":
char=""
script.append(new)
if char!="" and s[0:len(dialoguepre)]==dialoguepre and s[len(dialoguepre)]!=" ":
#print("Dialogue******")
if new['dialogue']!="":
new['dialogue']=new['dialogue']+" "
new['dialogue']=new['dialogue']+s[len(dialoguepre):]
if char!="" and ((s[0:len(adverb)]==adverb and s[len(adverb)]!=" ") or (len(s)>1 and s.strip()[0]=="(" and s.strip()[len(s.strip())-1]==")" )):
if new['adverb']!="":
new['adverb']=new['adverb']+" "
new['adverb']=new['adverb']+s[len(adverb):]
if s[0:len(newscenepre)]==newscenepre and len(s)>len(newscenepre) and ( s.isupper()) and s[len(newscenepre)]!=" ":
scene=scene+1
pd.DataFrame(script).to_csv(name+'.csv', index=None)
pd.DataFrame(script)
magnolia=pd.read_csv(name+'.csv')
stopwords = getstopwords()
removedchars=["'S VOICE", "'S WHISPER VOICE", " GATOR"]
for s in removedchars:
magnolia['char']=magnolia['char'].apply(lambda x: x.replace(s, ""))
i=0
scenes=dict()
for s in magnolia.iterrows():
scenes[s[1]['scene']]=[]
for s in magnolia.iterrows():
scenes[s[1]['scene']].append(s[1]['char'])
for s in magnolia.iterrows():
scenes[s[1]['scene']]=list(set(scenes[s[1]['scene']]))
characters=[]
for s in scenes:
for k in scenes[s]:
characters.append(k)
characters=list(set(characters))
appearances=dict()
for s in characters:
appearances[s]=0
for s in magnolia.iterrows():
appearances[s[1]['char']]=appearances[s[1]['char']]+1
a=pd.DataFrame(appearances, index=[i for i in range(len(appearances))])
finalcharacters=[]
for s in pd.DataFrame(a.transpose()[0].sort_values(0, ascending=False))[0:10].iterrows():
finalcharacters.append(s[0])
finalcharacters
file=open(name+"_nodes.csv", "w")
couplesappearances=dict()
for s in finalcharacters:
file.write(";")
file.write(s)
file.write("\n")
for s in finalcharacters:
newlist=[]
for f in finalcharacters:
newlist.append(0)
couplesappearances[f+"_"+s]=0
j=0
for f in finalcharacters:
for p in scenes:
if f in scenes[p] and s in scenes[p] and f!=s and finalcharacters.index(f)<finalcharacters.index(s):
long=len(magnolia[magnolia["scene"]==p])
newlist[j]=newlist[j]+long
couplesappearances[f+"_"+s]=couplesappearances[f+"_"+s]+long
j=j+1
file.write(s)
for f in newlist:
file.write(";")
file.write(str(f))
file.write("\n")
file.close()
a=pd.DataFrame(couplesappearances, index=[i for i in range(len(couplesappearances))])
finalcouples=[]
for s in pd.DataFrame(a.transpose()[0].sort_values(0, ascending=False))[0:4].iterrows():
finalcouples.append(s[0])
file=open(name+"_finalcharacters.csv", "w")
for s in finalcharacters:
file.write(s+"\n")
file.close()
file=open(name+"_finalcouples.csv", "w")
for s in finalcouples:
file.write(s+"\n")
file.close()
importantchars=[]
for char in appearances:
if appearances[char]>10:
importantchars.append(char)
file=open(name+"_sentiment_overtime_individual.csv", "w")
file2=open(name+"_sentiment_overtime_individualminsmaxs.csv", "w")
for k in finalcharacters:
print(k)
dd=getdialogue(magnolia, k, k, scenes)
dd=[str(d) for d in dd]
polarities, subjectivities=getsentiment(dd)
%matplotlib inline
import matplotlib.pyplot as plt
moveda=maverage(polarities, dd, .99)
plt.plot(moveda)
i=0
for s in moveda:
file.write(k+","+str(float(i)/len(moveda))+", "+str(s)+"\n")
i=i+1
plt.ylabel('polarities')
plt.show()
file2.write(k+"| MIN| "+dd[moveda.index(np.min(moveda))]+"\n")
file2.write(k+"| MAX| "+dd[moveda.index(np.max(moveda))]+"\n")
print("MIN: "+dd[moveda.index(np.min(moveda))])
print("\n")
print("MAX: "+dd[moveda.index(np.max(moveda))])
file.close()
file2.close()
file=open(name+"_sentiment_overtime_couples.csv", "w")
file2=open(name+"_sentiment_overtime_couplesminsmaxs.csv", "w")
for k in finalcouples:
print(k)
liston=k.split("_")
dd=getdialogue(magnolia, liston[0], liston[1], scenes)
dd=[str(d) for d in dd]
polarities, subjectivities=getsentiment(dd)
%matplotlib inline
import matplotlib.pyplot as plt
moveda=maverage(polarities, dd, .99)
plt.plot(moveda)
i=0
for s in moveda:
file.write(k+","+str(float(i)/len(moveda))+", "+str(s)+"\n")
i=i+1
plt.ylabel('polarities')
plt.show()
file2.write(k+"| MIN| "+dd[moveda.index(np.min(moveda))]+"\n")
file2.write(k+"| MAX| "+dd[moveda.index(np.max(moveda))]+"\n")
print("MIN: "+dd[moveda.index(np.min(moveda))])
print("\n")
print("MAX: "+dd[moveda.index(np.max(moveda))])
file.close()
file2.close()
for key, val in scenes.items():
for s in scenes[key]:
new="INSCENE_"+scenes[key][0]
scenes[key].remove(scenes[key][0])
scenes[key].append(new)
magnolia.dropna(subset=['dialogue'])
1
baskets=[]
spchars=["\"", "'", ".", ",", "-"]
attributes=["?", "!"]
for s in magnolia.iterrows():
if type(s[1]['dialogue'])!=float and len(s[1]['dialogue'])>0:
new=[]
for k in scenes[s[1]['scene']]:
new.append(k)
new.append("SPEAKING_"+s[1]['char'])
for k in s[1]['dialogue'].split(" "):
ko=k
for t in spchars:
ko=ko.replace(t, "")
for t in attributes:
if ko.find(t)>=0:
new.append(t)
ko=ko.replace(t, "")
if len(ko)>0:
new.append(ko.lower())
new=list(set(new))
baskets.append(new)
baskets2=[]
basketslist=[]
for k in baskets:
new=dict()
new2=[]
for t in k:
if t not in stopwords:
new[t]=1
new2.append(t)
baskets2.append(new)
basketslist.append(new2)
baskets2=pd.DataFrame(baskets2)
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
baskets2=baskets2.fillna(0)
baskets2.to_csv(name+'_basket.csv')
frequent_itemsets = apriori(baskets2, min_support=5/len(baskets2), use_colnames=True)
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)
rules['one_lower']=[int(alllower(i) or alllower(j)) for i, j in zip(rules['antecedants'], rules['consequents'])]
rules['both_lower']=[int(alllower(i) and alllower(j)) for i, j in zip(rules['antecedants'], rules['consequents'])]
rules.to_csv(name+'_rules.csv', index=None)
| . |
|---|
| RESERVOIR DOGS |
| Palabras Distintas |
|---|
| 1650 |
| Descripcion | Score | % Founded Words |
|---|---|---|
| Entre 0 (negativo) y 10 (positivo) | 3.774306 | 11.6% |
| sentiment | Porcentaje |
|---|---|
| negative | 62.8% |
| positive | 37.2% |
| sentiment | Porcentaje |
|---|---|
| negative | 19.7% |
| positive | 13.5% |
| fear | 10.8% |
| trust | 10.1% |
| sadness | 9.9% |
| anger | 9.6% |
| anticipation | 8.0% |
| disgust | 7.6% |
| joy | 6.0% |
| surprise | 4.8% |
| sentiment | Porcentaje |
|---|---|
| negative | 52.2% |
| uncertainty | 24.6% |
| positive | 19.7% |
| litigious | 3.0% |
| constraining | 0.5% |
[1] “Analisis de Sentimientos del Personaje: MR. WHITE” [1] “Numero total de Palabras Unicas en el texto: 589”
| Descripcion | Score | % Founded Words |
|---|---|---|
| Entre 0 (negativo) y 10 (positivo) | 3.705556 | 14.4% |
| sentiment | Porcentaje |
|---|---|
| negative | 67.1% |
| positive | 32.9% |
| sentiment | Porcentaje |
|---|---|
| negative | 22.2% |
| fear | 13.4% |
| anger | 11.7% |
| positive | 11.1% |
| sadness | 10.5% |
| disgust | 8.2% |
| trust | 8.0% |
| anticipation | 6.4% |
| surprise | 4.9% |
| joy | 3.5% |
| sentiment | Porcentaje |
|---|---|
| negative | 50% |
| positive | 24% |
| uncertainty | 24% |
| litigious | 2% |
[1] “Analisis de Sentimientos del Personaje: MR. PINK” [1] “Numero total de Palabras Unicas en el texto: 655”
| Descripcion | Score | % Founded Words |
|---|---|---|
| Entre 0 (negativo) y 10 (positivo) | 3.723926 | 10.8% |
| sentiment | Porcentaje |
|---|---|
| negative | 59.6% |
| positive | 40.4% |
| sentiment | Porcentaje |
|---|---|
| negative | 17.0% |
| positive | 14.6% |
| fear | 12.3% |
| sadness | 10.3% |
| trust | 10.3% |
| anger | 8.8% |
| anticipation | 8.6% |
| joy | 7.1% |
| disgust | 5.8% |
| surprise | 5.2% |
| sentiment | Porcentaje |
|---|---|
| negative | 45.1% |
| uncertainty | 35.3% |
| positive | 19.6% |
[1] “Analisis de Sentimientos del Personaje: JOE” [1] “Numero total de Palabras Unicas en el texto: 513”
| Descripcion | Score | % Founded Words |
|---|---|---|
| Entre 0 (negativo) y 10 (positivo) | 4 | 11.5% |
| sentiment | Porcentaje |
|---|---|
| negative | 58.7% |
| positive | 41.3% |
| sentiment | Porcentaje |
|---|---|
| negative | 19.7% |
| positive | 13.1% |
| anticipation | 11.8% |
| sadness | 10.2% |
| trust | 10.2% |
| fear | 7.5% |
| joy | 7.5% |
| disgust | 7.2% |
| anger | 6.9% |
| surprise | 5.9% |
| sentiment | Porcentaje |
|---|---|
| negative | 59.4% |
| positive | 15.6% |
| uncertainty | 15.6% |
| litigious | 9.4% |
[1] “Analisis de Sentimientos del Personaje: MR. ORANGE” [1] “Numero total de Palabras Unicas en el texto: 490”
| Descripcion | Score | % Founded Words |
|---|---|---|
| Entre 0 (negativo) y 10 (positivo) | 3.814159 | 13.1% |
| sentiment | Porcentaje |
|---|---|
| negative | 71.3% |
| positive | 28.7% |
| sentiment | Porcentaje |
|---|---|
| negative | 19.0% |
| positive | 13.0% |
| fear | 12.1% |
| trust | 12.1% |
| sadness | 11.1% |
| anger | 9.5% |
| disgust | 8.6% |
| anticipation | 6.7% |
| joy | 4.8% |
| surprise | 3.2% |
| sentiment | Porcentaje |
|---|---|
| negative | 56% |
| uncertainty | 32% |
| positive | 12% |
[1] “Analisis de Sentimientos del Personaje: EDDIE” [1] “Numero total de Palabras Unicas en el texto: 525”
| Descripcion | Score | % Founded Words |
|---|---|---|
| Entre 0 (negativo) y 10 (positivo) | 3.533333 | 10.7% |
| sentiment | Porcentaje |
|---|---|
| negative | 59.4% |
| positive | 40.6% |
| sentiment | Porcentaje |
|---|---|
| negative | 18.3% |
| positive | 14.9% |
| trust | 11.6% |
| fear | 10.0% |
| sadness | 10.0% |
| anger | 9.5% |
| anticipation | 7.9% |
| disgust | 7.1% |
| joy | 6.6% |
| surprise | 4.1% |
| sentiment | Porcentaje |
|---|---|
| negative | 55.6% |
| positive | 16.7% |
| uncertainty | 16.7% |
| constraining | 5.6% |
| litigious | 5.6% |
[1] “Analisis de Sentimientos del Personaje: MR. BLONDE” [1] “Numero total de Palabras Unicas en el texto: 324”
| Descripcion | Score | % Founded Words |
|---|---|---|
| Entre 0 (negativo) y 10 (positivo) | 4.126984 | 13.6% |
| sentiment | Porcentaje |
|---|---|
| negative | 53.06% |
| positive | 46.94% |
| sentiment | Porcentaje |
|---|---|
| negative | 22.0% |
| positive | 12.8% |
| anger | 11.0% |
| disgust | 9.1% |
| fear | 9.1% |
| trust | 9.1% |
| joy | 7.3% |
| surprise | 7.3% |
| anticipation | 6.7% |
| sadness | 5.5% |
| sentiment | Porcentaje |
|---|---|
| negative | 50.0% |
| positive | 33.3% |
| uncertainty | 16.7% |
[1] “Analisis de Sentimientos del Personaje: HOLDAWAY” [1] “Numero total de Palabras Unicas en el texto: 170”
| Descripcion | Score | % Founded Words |
|---|---|---|
| Entre 0 (negativo) y 10 (positivo) | 4.583333 | 11.2% |
| sentiment | Porcentaje |
|---|---|
| positive | 52.63% |
| negative | 47.37% |
| sentiment | Porcentaje |
|---|---|
| positive | 22.2% |
| negative | 20.4% |
| anger | 11.1% |
| fear | 11.1% |
| trust | 9.3% |
| disgust | 7.4% |
| sadness | 7.4% |
| joy | 5.6% |
| anticipation | 3.7% |
| surprise | 1.9% |
| sentiment | Porcentaje |
|---|---|
| negative | 42.9% |
| positive | 42.9% |
| uncertainty | 14.3% |
[1] “Analisis de Sentimientos del Personaje: NICE GUY EDDIE” [1] “Numero total de Palabras Unicas en el texto: 163”
| Descripcion | Score | % Founded Words |
|---|---|---|
| Entre 0 (negativo) y 10 (positivo) | 4 | 8.59% |
| sentiment | Porcentaje |
|---|---|
| negative | 75% |
| positive | 25% |
| sentiment | Porcentaje |
|---|---|
| negative | 21.3% |
| positive | 14.9% |
| anticipation | 10.6% |
| trust | 10.6% |
| anger | 8.5% |
| disgust | 8.5% |
| joy | 8.5% |
| sadness | 8.5% |
| fear | 4.3% |
| surprise | 4.3% |
| sentiment | Porcentaje |
|---|---|
| uncertainty | 66.7% |
| litigious | 33.3% |
[1] “Analisis de Sentimientos del Personaje: MR. BROWN” [1] “Numero total de Palabras Unicas en el texto: 152”
| Descripcion | Score | % Founded Words |
|---|---|---|
| Entre 0 (negativo) y 10 (positivo) | 3.294118 | 14.5% |
| sentiment | Porcentaje |
|---|---|
| negative | 70% |
| positive | 30% |
| sentiment | Porcentaje |
|---|---|
| negative | 21.5% |
| positive | 16.9% |
| trust | 12.3% |
| anticipation | 9.2% |
| fear | 9.2% |
| sadness | 9.2% |
| anger | 7.7% |
| disgust | 6.2% |
| joy | 6.2% |
| surprise | 1.5% |
| sentiment | Porcentaje |
|---|---|
| negative | 83.3% |
| positive | 16.7% |
| Personaje | Min_Max | Dialogo |
|---|---|---|
| MR. WHITE | MIN | Hey, just cancel that shit right now! You’re hurt. You’re hurt really fucking bad, but you ain’t dying. |
| MR. WHITE | MAX | Piss on this turd, we’re outta here. |
| MR. PINK | MIN | Yeah, and that was a fucking miracle. But if they did get away, where the fuck are they? |
| MR. PINK | MAX | These ladies aren’t starvin’ to death. They make minimum wage. When I worked for minimum wage, I wasn’t lucky enough to have a job that society deemed tipworthy. |
| EDDIE | MIN | Holy shit, this guy’s all fucked up! |
| EDDIE | MAX | Ahh, now we’re getting down to it. It’s not just that he’s a cheap bastard - |
| JOE | MIN | No, she did it. She killed the cheatin’ wife, too. |
| JOE | MAX | Come in. |
| MR. ORANGE | MIN | I’m sorry, I’m so sorry. |
| MR. ORANGE | MAX | What? |
| MR. BLONDE | MIN | I said: “Are you gonna bark all day, dog, or are you gonna bite.” |
| MR. BLONDE | MAX | “Clowns to the left of me, Jokers to the right. Here I am, stuck in the middle with you.” |
| HOLDAWAY | MIN | It’s a scene. Memorize it. |
| HOLDAWAY | MAX | This better not be some Freddy joke. |
| MR. BROWN | MIN | My eyes! My eyes! I’m blind, I’m fucking blind! |
| MR. BROWN | MAX | I love this guy, he’s a madman, this guy. |
| MR. BLUE | MIN | These people are taxed on the tips they make. When you stiff ’em, you cost them money. |
| MR. BLUE | MAX | What’s something special, take ya in the kitchen and suck your dick? |
| MARVIN | MIN | What the fuck are they waiting for? That motherfucker cut off my ear! He slashed my face! I’m deformed! |
| MARVIN | MAX | I do. How do I look? |
| Parejas | Min_Max | Dialogo |
|---|---|---|
| MR. WHITE_MR. PINK | MIN | Well, then, I’m afraid I’m gonna have to keep it. |
| MR. WHITE_MR. PINK | MAX | He’s right about the ear, it’s hacked off. |
| MR. PINK_EDDIE | MIN | Have you lost your fucking mind? Put your gun down! |
| MR. PINK_EDDIE | MAX | Ahh, now we’re getting down to it. It’s not just that he’s a cheap bastard - |
| MR. WHITE_EDDIE | MIN | Well, then, I’m afraid I’m gonna have to keep it. |
| MR. WHITE_EDDIE | MAX | Mr. Orange, why don’t you tell me what really happened? |
| MR. WHITE_MR. ORANGE | MIN | You don’t have any idea what you’re talking about. These people bust their ass. This is a hard job. |
| MR. WHITE_MR. ORANGE | MAX | Look, I don’t wanna be a fly in the ointment, but if help doesn’t come soon, I gotta see a doctor. I don’t give a fuck about jail, I just don’t wanna die. |
## [1] "Lift Promedio de las Reglas de Asociacion: 4.69485401156291"
## [1] "Desviación estandar del Lift de las Reglas de Asociacion: 2.06590961676948"
## [1] "Deciles del Lift : "
## 10% 20% 30% 40% 50% 60% 70%
## 2.059233 2.723502 3.621324 4.766129 5.794118 5.794118 5.794118
## 80% 90% 100%
## 5.794118 5.794118 60.306122
| Numero de Dialogos | Lift Minimo | Lift Maximo |
|---|---|---|
| 1,412 | -1 | 1 |
| 404,562 | 1 | 3 |
| 338,678 | 3 | 5 |
| 784,746 | 5 | 7 |
| 7,238 | 7 | 9 |
| 21,168 | 9 | 11 |
## [1] "Leverage Promedio de las Reglas de Asociacion: 0.0116943714177238"
## [1] "Desviación estandar del Leverage de las Reglas de Asociacion: 0.0145948327654564"
## [1] "Deciles del Leverage : "
## 10% 20% 30% 40% 50% 60%
## 0.005247924 0.006542011 0.007000094 0.007000094 0.008400113 0.008649196
## 70% 80% 90% 100%
## 0.010032037 0.014000189 0.019147907 0.157867161
| Numero de Dialogos | Leverage Minimo | Leverage Maximo |
|---|---|---|
| 47,342 | -0.0027 | 0.0027 |
| 698,272 | 0.0027 | 0.0082 |
| 503,520 | 0.0082 | 0.014 |
| 172,574 | 0.014 | 0.019 |
| 75,806 | 0.019 | 0.024 |
| 10,676 | 0.024 | 0.03 |
Pagerank: Reservoir Dogs.